#!/usr/bin/env python
import os
import sys
if os.path.exists('/home/chieh/code/wuML'):
sys.path.insert(0,'/home/chieh/code/wuML')
import wuml
import numpy as np
regression test
data is designed such that x₁, x₂ has minor positive impact x₄ has major negative,
5x₁ + x₂ + x₁x₂ - 8x₄ - 2x₄x₄ + δ
data = wuml.wData('../../data/shap_regress_example_gaussian.csv', first_row_is_label=True,
label_type='continuout', label_column_name='label', preprocess_data='center and scale')
model = wuml.regression(data, regressor='linear')
E = wuml.explainer(data, model, explainer_algorithm='shap')
exp = E(data)
| A | B | C | D | y | ŷ | Δy | |
|---|---|---|---|---|---|---|---|
| 0 | 8.331189 | 0.289953 | -0.501698 | -18.577235 | -18.0403 | -13.174735 | 4.865565 |
| 1 | 8.868911 | -1.330877 | -0.487391 | 3.638149 | 7.6894 | 7.971847 | 0.282447 |
| 2 | -1.370143 | 0.302190 | -0.084151 | -11.272708 | -16.0089 | -15.141755 | 0.867145 |
| 3 | 3.119704 | -0.037743 | -0.234172 | -0.866588 | 1.1407 | -0.735743 | 1.876443 |
| 4 | 6.928437 | -0.422388 | -0.168741 | 10.163563 | 12.3256 | 13.783926 | 1.458326 |
| 5 | -14.097783 | 0.588115 | -0.444521 | 9.124437 | -8.9458 | -7.546695 | 1.399105 |
| 6 | 10.958492 | -1.892255 | -0.035028 | 3.970595 | 8.0164 | 10.284860 | 2.268460 |
| 7 | 7.129498 | 1.548023 | -0.089604 | -1.279825 | 8.0926 | 4.591148 | 3.501452 |
| 8 | -5.446437 | -2.511641 | 0.161915 | 0.780784 | -5.9542 | -9.732324 | 3.778124 |
| 9 | 5.557895 | 1.233859 | 0.181624 | 5.039437 | 11.0728 | 9.295871 | 1.776929 |
| 10 | -6.281854 | -1.851778 | 0.841435 | -15.883307 | -28.3988 | -25.892449 | 2.506351 |
| 11 | -3.482064 | -0.696429 | 0.614578 | -4.987805 | -10.1866 | -11.268665 | 1.082065 |
| 12 | -9.218802 | -0.431213 | 0.435844 | -1.360614 | -11.3402 | -13.291730 | 1.951530 |
| 13 | -3.487779 | -1.570089 | 0.001989 | -1.745063 | -6.9249 | -9.517886 | 2.592986 |
| 14 | -0.488487 | 0.174995 | 0.305182 | 5.600323 | 3.2876 | 2.875069 | 0.412531 |
| 15 | -4.327872 | -0.604063 | 0.394624 | 18.262990 | 4.3769 | 11.008735 | 6.631835 |
| 16 | 0.087681 | -0.653717 | 0.803367 | -2.065437 | -3.7108 | -4.545051 | 0.834251 |
| 17 | -5.547747 | -0.119873 | -0.376839 | 1.034297 | -5.5989 | -7.727106 | 2.128206 |
| 18 | 5.085636 | -1.633863 | -0.213362 | 8.591409 | 7.5997 | 9.112875 | 1.513175 |
| 19 | -5.358116 | -0.861983 | 0.143756 | 1.710333 | -4.8953 | -7.082954 | 2.187654 |
| 20 | -6.887116 | 0.878982 | -0.245077 | 16.497683 | 1.5804 | 7.527528 | 5.947128 |
| 21 | 6.898303 | 2.049863 | -0.601795 | 3.902805 | 13.5378 | 9.532232 | 4.005568 |
| 22 | -6.397192 | 1.059833 | 0.189578 | -9.119237 | -18.1950 | -16.983963 | 1.211037 |
| 23 | 0.248218 | 0.968172 | -0.190401 | -4.329414 | -4.4233 | -6.020369 | 1.597069 |
| 24 | -0.779428 | 1.920432 | -0.075597 | -1.500836 | -1.6582 | -3.152374 | 1.494174 |
| 25 | 8.949959 | -1.766824 | 0.623432 | -6.769828 | -4.0952 | -1.680205 | 2.414995 |
| 26 | -6.928679 | 2.105989 | 0.194780 | 9.173654 | -1.3490 | 1.828800 | 3.177800 |
| 27 | 9.156215 | 1.561084 | -0.946357 | -6.181082 | 5.0631 | 0.872915 | 4.190185 |
| 28 | -5.308240 | 2.066571 | 0.121946 | -5.219960 | -11.7617 | -11.056628 | 0.705072 |
| 29 | 4.087603 | -0.363321 | -0.319312 | -6.331519 | -4.6488 | -5.643493 | 0.994693 |
| Most chosen | |
|---|---|
| A | 15 |
| D | 14 |
| B | 1 |
| C | 0 |
| Most weighted | |
|---|---|
| D | 194.980919 |
| A | 170.815480 |
| B | 33.496118 |
| C | 10.028097 |
classification test<br>
ata = wuml.wData('../../data/shap_classifier_example.csv', first_row_is_label=True,
label_type='discrete', label_column_name='label')
odel = wuml.classification(data, classifier='LogisticRegression')
= wuml.explainer(data, model, explainer_algorithm='shap')
xp = E(data)